dat.raw <- read_dta(paste0(data.raw_path, "preliminary_Katie_12Nov21.dta"))
colnames(dat.raw)

[1] “familyid” “atwinid” “btwinid” “rorderp5” “sampsex” “zygosity” [7] “seswq35” “sethnic” “sisoe5” “inem5” “hyem5” “tadhdem5” [13] “inet5” “hyet5” “tadhdet5” “sisoe7” “inem7” “hyem7”
[19] “tadhdem7” “inet7” “hyet7” “tadhdet7” “sisoe10” “inem10”
[25] “hyem10” “tadhdem10” “inet10” “hyet10” “tadhdet10” “sisoe12”
[31] “inem12” “hyem12” “tadhdem12” “inet12” “hyet12” “tadhdet12”

dataframe_list <- list(
  dat.raw,
  dat.rds
)

#merge data frames
dat <- plyr::join_all(
  dataframe_list,
  by = "atwinid" # Alternatively you can join by several columns
  )

colnames(dat)

[1] “familyid” “atwinid” “btwinid”
[4] “rorderp5” “sampsex” “zygosity”
[7] “seswq35” “sethnic” “sisoe5”
[10] “inem5” “hyem5” “tadhdem5”
[13] “inet5” “hyet5” “tadhdet5”
[16] “sisoe7” “inem7” “hyem7”
[19] “tadhdem7” “inet7” “hyet7”
[22] “tadhdet7” “sisoe10” “inem10”
[25] “hyem10” “tadhdem10” “inet10”
[28] “hyet10” “tadhdet10” “sisoe12”
[31] “inem12” “hyem12” “tadhdem12”
[34] “inet12” “hyet12” “tadhdet12”
[37] “isolation_mother_05” “isolation_mother_07” “isolation_mother_10” [40] “isolation_mother_12” “isolation_teacher_05” “isolation_teacher_07” [43] “isolation_teacher_10” “isolation_teacher_12”

si.adhd.variables <- c(
  "sisoe5",
  "inem5",
  "hyem5",
  "tadhdem5",
  "inet5",
  "hyet5",
  "tadhdet5",
  "sisoe7",
  "inem7",
  "hyem7",
  "tadhdem7",
  "inet7",
  "hyet7",
  "tadhdet7",
  "sisoe10",
  "inem10",
  "hyem10",
  "tadhdem10",
  "inet10",
  "hyet10",
  "tadhdet10",
  "sisoe12",
  "inem12",
  "hyem12",
  "tadhdem12",
  "inet12",
  "hyet12",
  "tadhdet12",
  "isolation_mother_05",
  "isolation_mother_07",
  "isolation_mother_10",
  "isolation_mother_12",
  "isolation_teacher_05",
  "isolation_teacher_07",
  "isolation_teacher_10",
  "isolation_teacher_12"
)

name.list <- c(
  "Social isolation age 5",
  "Inattention mother report age 5",
  "Hyperactivity mother report age 5",
  "Total ADHD mother report age 5",
  "Inattention teacher report age 5",
  "Hyperactivity teacher report age 5",
  "Total ADHD teacher report age 5",
  "Social isolation age 7",
  "Inattention mother report age 7",
  "Hyperactivity mother report age 7",
  "Total ADHD mother report age 7",
  "Inattention teacher report age 7",
  "Hyperactivity teacher report age 7",
  "Total ADHD teacher report age 7",
  "Social isolation age 10",
  "Inattention mother report age 10",
  "Hyperactivity mother report age 10",
  "Total ADHD mother report age 10",
  "Inattention teacher report age 10",
  "Hyperactivity teacher report age 10",
  "Total ADHD teacher report age 10",
  "Social isolation age 12",
  "Inattention mother report age 12",
  "Hyperactivity mother report age 12",
  "Total ADHD mother report age 12",
  "Inattention teacher report age 12",
  "Hyperactivity teacher report age 12",
  "Total ADHD teacher report age 12",
  "Social isolation mother report age 5",
  "Social isolation mother report age 7",
  "Social isolation mother report age 10",
  "Social isolation mother report age 12",
  "Social isolation teacher report age 5",
  "Social isolation teacher report age 7",
  "Social isolation teacher report age 10",
  "Social isolation teacher report age 12"
)

Demographic variables

Sample group, cohort, sex, zygosity, SES, and ethnicity variables below have been recoded into a factor for use in R and renamed to something familiar. Only the categorical variables need recoding to establish the levels of the variable. To see the code for this, click the “code” button on the right hand side.

Twin order

# # twin order
# dat <- dat %>%
#   mutate(
#     twin_order = 
#       recode_factor(torder,
#         "1" = "Elder",
#         "2" = "Younger"))

Sample group (Low/high risk)

# # sample groups
# dat <- dat %>%
#   mutate(
#     risk = # this represents mothers who had their first child under 20 years old
#       recode_factor(risks,
#         "0" = "Low risk",
#         "1" = "High risk"))

Cohort

# # cohort
# dat <- dat %>%
#   mutate(
#     cohort_binary = 
#       recode_factor(cohort,
#         "94" = "Born in 1994",
#         "95" = "Born in 1995"))

Sex

# # sex
# dat <- dat %>%
#   dplyr::mutate(
#     sex = 
#       recode_factor(sampsex,
#         "1" = "Male",
#         "2" = "Female"))

Zygosity

# # zygosity
# dat <- dat %>%
#   mutate(
#     zygosity_binary = 
#       recode_factor(zygosity,
#         "1" = "MZ",
#         "2" = "DZ"))

SES

# # SES
# dat <- dat %>%
#   mutate(
#     SES = 
#       recode_factor(seswq35,
#         "1" = "Low",
#         "2" = "Middle", #this was missing in original SPSS file but still have 2s in the data set
#         "3" = "High"))

Ethnicity

# # ethnicity
# dat <- dat %>%
#   mutate(
#     ethnicity = 
#       recode_factor(sethnic,
#         "1" = "White",
#         "2" = "Asian", 
#         "3" = "Black",
#         "4" = "Mixed race",
#         "5" = "Other"))

A new variable was created for SES. SES_ordered has ordered SES to show that there are order differences between the labels “low”, “middle”, and “high”.

# dat <- dat %>%
#   mutate(
#     SES_ordered = 
#       ordered(SES,
#               levels = c("Low",
#                          "Middle",
#                          "High")
#       )
#   )
# create data frame with variables
si.adhd_data_frame <- as.data.frame(dat[,si.adhd.variables])
colnames(si.adhd_data_frame) <- name.list

# get correlation matrix
cor_matrix <- cor(si.adhd_data_frame, method = "spearman", use = "pairwise.complete.obs")

# Reorder the correlation matrix
cor_matrix_full <- reorder_cor_matrix(cor_matrix)
  
#melt the values
metled_cor_matrix <- reshape::melt(cor_matrix_full, na.rm = TRUE)

#correlation heat map
correlation_heat_map <- ggplot(metled_cor_matrix, aes(X2, X1, fill = value))+
 geom_tile(color = "white")+
 scale_fill_gradient2(low = "blue", high = "red", mid = "white",
   midpoint = 0, limit = c(-1,1), space = "Lab",
    name="Spearman\nCorrelation") +
  theme_minimal() +
  labs(y = "",
       x = "",
       title = "Correlation heat map of ADHD and social isolation variables")+
 theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 12, hjust = 1),
       axis.text.y = element_text(size = 12),
       plot.margin=grid::unit(c(0,0,0,0), "mm"),
       plot.title = element_text(size = 20),
       )+
 coord_fixed() +
  geom_text(aes(X2, X1, label = round(value, digits = 2)), color = "black", size = 3)
correlation_heat_map

si.cor_matrix_total <- as.data.frame(cor_matrix_full) %>%
  select(`Social isolation age 5`,
         `Social isolation age 7`,
         `Social isolation age 10`,
         `Social isolation age 12`
         )

# create correlation matrices for mother reported total, inattention and hyperactivity
si.tot.cor_matrix_total <- si.cor_matrix_total[c(19,22,12,14),]  # total  
si.inat.cor_matrix_total <- si.cor_matrix_total[c(17,20,15,16),] # inattention
si.hyp.cor_matrix_total <- si.cor_matrix_total[c(18,21,11,13),]  # hyperactivity
kable(round(si.tot.cor_matrix_total,2))
Social isolation age 5 Social isolation age 7 Social isolation age 10 Social isolation age 12
Total ADHD mother report age 5 0.24 0.22 0.22 0.20
Total ADHD mother report age 7 0.21 0.26 0.28 0.24
Total ADHD mother report age 10 0.16 0.23 0.29 0.27
Total ADHD mother report age 12 0.16 0.24 0.25 0.28
kable(round(si.inat.cor_matrix_total,2))
Social isolation age 5 Social isolation age 7 Social isolation age 10 Social isolation age 12
Inattention mother report age 5 0.22 0.20 0.20 0.20
Inattention mother report age 7 0.20 0.26 0.26 0.24
Inattention mother report age 10 0.17 0.22 0.28 0.26
Inattention mother report age 12 0.14 0.20 0.23 0.27
kable(round(si.hyp.cor_matrix_total,2))
Social isolation age 5 Social isolation age 7 Social isolation age 10 Social isolation age 12
Hyperactivity mother report age 5 0.20 0.20 0.19 0.17
Hyperactivity mother report age 7 0.19 0.22 0.24 0.21
Hyperactivity mother report age 10 0.13 0.21 0.24 0.22
Hyperactivity mother report age 12 0.13 0.22 0.21 0.23
si.cor_matrix_mother <- as.data.frame(cor_matrix_full) %>%
  select(`Social isolation mother report age 5`,
         `Social isolation mother report age 7`,
         `Social isolation mother report age 10`,
         `Social isolation mother report age 12`
         )

# create correlation matrices for mother reported total, inattention and hyperactivity
si.tot.cor_matrix_mother <- si.cor_matrix_mother[c(19,22,12,14),]  # total  
si.inat.cor_matrix_mother <- si.cor_matrix_mother[c(17,20,15,16),] # inattention
si.hyp.cor_matrix_mother <- si.cor_matrix_mother[c(18,21,11,13),]  # hyperactivity
kable(round(si.tot.cor_matrix_mother, 2))
Social isolation mother report age 5 Social isolation mother report age 7 Social isolation mother report age 10 Social isolation mother report age 12
Total ADHD mother report age 5 0.25 0.23 0.18 0.21
Total ADHD mother report age 7 0.22 0.27 0.24 0.27
Total ADHD mother report age 10 0.17 0.21 0.27 0.27
Total ADHD mother report age 12 0.18 0.22 0.21 0.30
kable(round(si.inat.cor_matrix_mother,2))
Social isolation mother report age 5 Social isolation mother report age 7 Social isolation mother report age 10 Social isolation mother report age 12
Inattention mother report age 5 0.24 0.20 0.16 0.20
Inattention mother report age 7 0.20 0.25 0.20 0.24
Inattention mother report age 10 0.15 0.20 0.25 0.26
Inattention mother report age 12 0.16 0.18 0.20 0.28
kable(round(si.hyp.cor_matrix_mother,2))
Social isolation mother report age 5 Social isolation mother report age 7 Social isolation mother report age 10 Social isolation mother report age 12
Hyperactivity mother report age 5 0.20 0.21 0.16 0.18
Hyperactivity mother report age 7 0.19 0.23 0.21 0.24
Hyperactivity mother report age 10 0.14 0.19 0.22 0.22
Hyperactivity mother report age 12 0.16 0.19 0.17 0.25
kable(round(rbind(si.tot.cor_matrix_mother, si.inat.cor_matrix_mother, si.hyp.cor_matrix_mother),2))
Social isolation mother report age 5 Social isolation mother report age 7 Social isolation mother report age 10 Social isolation mother report age 12
Total ADHD mother report age 5 0.25 0.23 0.18 0.21
Total ADHD mother report age 7 0.22 0.27 0.24 0.27
Total ADHD mother report age 10 0.17 0.21 0.27 0.27
Total ADHD mother report age 12 0.18 0.22 0.21 0.30
Inattention mother report age 5 0.24 0.20 0.16 0.20
Inattention mother report age 7 0.20 0.25 0.20 0.24
Inattention mother report age 10 0.15 0.20 0.25 0.26
Inattention mother report age 12 0.16 0.18 0.20 0.28
Hyperactivity mother report age 5 0.20 0.21 0.16 0.18
Hyperactivity mother report age 7 0.19 0.23 0.21 0.24
Hyperactivity mother report age 10 0.14 0.19 0.22 0.22
Hyperactivity mother report age 12 0.16 0.19 0.17 0.25
 

Work by Katherine N Thompson

katherine.n.thompson@kcl.ac.uk